Load data

[1] 38 [1] TRUE

Metadata

#1.Donors #2.Samples

samples_baseline = metadata_filt$Sample[metadata_filt$Stimulation %in% "ununstim"] # Includes same donor 
samples_condition = metadata_filt$Sample[metadata_filt$Stimulation %in% "unstim"] # Includes same donor  

metadata4deg = metadata_filt[rownames(metadata_filt) %in% c(samples_baseline,samples_condition) ,]

genes_baseline_condition = as.data.frame(genes_counts_ordered[, colnames(genes_counts_ordered) %in% c(samples_baseline, samples_condition)])
# dim(genes_voom_baseline_condition)
# dim(metadata4deg)

#remove samples in genes counts datafile
#genes_counts_cultured <- genes_counts_ordered2[,metadata_cultured$Sample]
experimentBycondition = unique(metadata4deg[,c("Donor_id", "Stimulation")])
#createDT(experimentBycondition)
as.data.frame(t(as.matrix(unclass(  table(experimentBycondition$Stimulation, useNA = "ifany"))))) %>%
kable(row.names = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
unstim ununstim
61 21
#genes_counts_cultured <- genes_counts_ordered2[,metadata_cultured$Sample]
experimentBycondition = unique(metadata4deg[,c("Sample", "Stimulation")])
#createDT(experimentBycondition)
as.data.frame(t(as.matrix(unclass(  table(experimentBycondition$Stimulation, useNA = "ifany"))))) %>%
kable(row.names = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
unstim ununstim
135 42

DREAM model

# The variable to be tested must be a fixed effect
names(metadata4deg) = tolower(names(metadata4deg))

#not corrected for region!

form <- ~ age + (1|donor_id) + stimulation + sex + picard_pct_ribosomal_bases + picard_pct_mrna_bases + picard_pct_percent_duplication + picard_pct_pf_reads_aligned 

# estimate weights using linear mixed model of dream
#vobjDream = voomWithDreamWeights( genes_baseline_condition, form, metadata4deg)

# Fit the dream model on each gene
#fit = (dream( vobjDream, form, metadata4deg ))

# Results generated
#res_cultured_uncultured <- data.frame(topTable(fit, coef='stimulationununstim', number=nrow(genes_counts_cultured), sort.by = "p"), check.names = F)
#save(res_uncultured_cultured, file ="res_uncultured_cultured.Rdata")

DE genes 15%

load("~/Documents/MiGASti/Databases/res_uncultured_cultured.Rdata")
load("~/Downloads/genes_counts_cultured.Rdata")
res_cult <- res_uncultured_cultured
gencode_30 = read.table("~/Documents/MiGASti/Databases/ens.geneid.gencode.v30")
colnames(gencode_30) = c("ensembl","symbol")
res_cult <- tibble::rownames_to_column(res_cult, "ensembl")
res_name_cult <- merge(res_cult, gencode_30, by = "ensembl")
sign_cult <- subset(res_name_cult, adj.P.Val < 0.15)
length(rownames(sign_cult))
## [1] 14250

DE genes 10%

sign_cult <- subset(res_name_cult, adj.P.Val < 0.10)
sign_cult
length(rownames(sign_cult))
## [1] 13638

DE genes 5%

sign_cult <- subset(res_name_cult, adj.P.Val < 0.05)
sign_cult
length(rownames(sign_cult))
## [1] 12615

FDR distibution

res = res_name_cult
p = ggplot(res, aes(P.Value))
p + geom_density(color="darkblue", fill="lightblue") +
theme_classic() +
ggtitle("FDR Distribution")

Fold change distribution

p = ggplot(res, aes(logFC))
p + geom_density(color = "darkblue", fill = "lightblue") +
theme_classic() +
ggtitle("Fold Change Distribution")

MA plot

plot.data = res
plot.data$id = rownames(plot.data)
data = data.frame(plot.data)
data$P.Value = -log10(data$P.Value)
data$fifteen = as.factor(abs(data$adj.P.Val < 0.05))
ma = ggplot(data, aes(AveExpr, logFC, color = fifteen))
ma + geom_point() +
scale_color_manual(values = c("black", "red"), labels = c ("> 0.05", "< 0.05")) +
labs(title = "MA plot", color = "labels") +
theme_classic()

#theme(plot.title = element_text(hjust = 0.5)) + ylim (-10,10) + xlim(-4,22)

Volcano plot

vp = ggplot(data, aes(logFC, P.Value, color = fifteen))
vp + geom_point() +
scale_color_manual(values = c("black", "red"), labels = c("> 0.05", "< 0.05")) +
labs(title = "Gene Level Volcano Plot", color = "FDR") +
#theme(plot.title = element_text(hjust = 0.5)) +
theme_classic() +
xlim(-10,10) + ylim(0, 10) + ylab("-log10 pvalue")
## Warning: Removed 2319 rows containing missing values (geom_point).

load("~/Documents/MiGASti/docs/res_name_LPS2.Rdata")

Boxplot top 6 genes

#note. pvalues based on wilcox test of log2(tpm)+1 without additional correction(not based on pvalue DEGs with DREAM)

#set rownames to Sample
genes_tpm_filt = genes_tpm[, !colnames(genes_tpm) %in% exclude] 
#Excludes the samples from filters. 
#dim(genes_tpm_filt)
metadata_filt = metadata[ !(rownames(metadata) %in% exclude), ]
length(metadata_filt)
## [1] 38
gencode_30 <- read.delim("~/Documents/MiGASti/Databases/ens.geneid.gencode.v30")
colnames(gencode_30) = c("ensembl","symbol")
genes_tpm_filt = log2((genes_tpm_filt) + 1)
genes_tpm_filt <- as.data.frame(genes_tpm_filt)
genes_tpm_ordered <- genes_tpm_filt[,rownames(metadata_filt)]
#head(genes_tpm_ordered)
all(rownames(metadata_filt) == colnames (genes_tpm_ordered)) #TRUE
## [1] TRUE
samples_baseline = metadata_filt$Sample[metadata_filt$Stimulation %in% "ununstim"] # Includes same donor 
samples_condition = metadata_filt$Sample[metadata_filt$Stimulation %in% "unstim"] # Includes same donor  

metadata4deg = metadata_filt[rownames(metadata_filt) %in% c(samples_baseline,samples_condition) ,]

genes_baseline_condition = as.data.frame(genes_tpm_ordered[, colnames(genes_tpm_ordered) %in% c(samples_baseline, samples_condition)])
# dim(genes_voom_baseline_condition)
# dim(metadata4deg)

#create top 6 genes from DEG
deg_lists = res_name_cult[order(res_name_cult$adj.P.Val) ,]
top_6 = head(deg_lists)
data.table(top_6)
#create dataframe
gene2check = as.data.frame(genes_baseline_condition[top_6$ensembl, ])

#merge dataframe with symbol (gene_id)
gene2check$ensembl = rownames(gene2check)
top_6 <- merge(gencode_30, top_6, by = "ensembl")
gene2check = merge(gene2check, top_6[, c("symbol.y", "ensembl")], by = "ensembl")

names(metadata4deg) = tolower(names(metadata4deg))

#create dataframe for plots
gene2check_m = melt(gene2check, id.vars = c("ensembl", "symbol.y"))
## Warning in melt(gene2check, id.vars = c("ensembl", "symbol.y")): The melt
## generic in data.table has been passed a data.frame and will attempt to redirect
## to the relevant reshape2 method; please note that reshape2 is deprecated, and
## this redirection is now deprecated as well. To continue using melt methods from
## reshape2 while both libraries are attached, e.g. melt.list, you can prepend the
## namespace like reshape2::melt(gene2check). In the next version, this warning
## will become an error.
gene2check_charac = merge(gene2check_m, metadata4deg, by.x = "variable", by.y = "sample")

# show direction of effect for male versus female
ggplot(gene2check_charac, aes(x = stimulation, y = value, fill = stimulation)) +
  geom_boxplot(notch = F, na.rm = T) +
  geom_jitter() +
  theme_bw() + facet_wrap(~symbol.y, scales = "free_y") +
  ggpubr::stat_compare_means(label = "p.format", label.x.npc = "centre", method = "wilcox.test")
## Registered S3 methods overwritten by 'car':
##   method                          from
##   influence.merMod                lme4
##   cooks.distance.influence.merMod lme4
##   dfbeta.influence.merMod         lme4
##   dfbetas.influence.merMod        lme4

Data table for download

res_cult_diff_top = res_name_cult[, c("ensembl", "symbol", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "z.std")]
createDT(res_cult_diff_top)
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html